package us.codecraft.netmagic.samples;

import us.codecraft.netmagic.downloader.ResourceHttpClientDownloader;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.model.AfterExtractor;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.ExtractByUrl;
import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.scheduler.RedisScheduler;

/**
 * @author code4crafter@gmail.com <br>
 *         Date: 13-8-11 <br>
 *         Time: 下午4:12 <br>
 */
@TargetUrl({"http://manhua.fzdm.com/2/\\d+[/]?", "http://manhua.fzdm.com/2/\\d+/index_\\d+.html"})
@HelpUrl("http://manhua.fzdm.com/2/*")
public class ManhuaPageProcessor implements AfterExtractor {

    @ExtractBy(value = "//img[@id='mhpic']/@src", notNull = true)
    private String imgSrc;

    @ExtractByUrl("http://manhua\\.fzdm\\.com/2/(\\d+)/.*")
    private String cap;

    @ExtractByUrl("http://manhua\\.fzdm\\.com/2/\\d+/index_(\\d+)\\.html")
    private String page;

    public static void main(String[] args) {
        OOSpider.create(Site.me().setSleepTime(10).setRetryTimes(3).addStartUrl("http://manhua.fzdm.com/2/").setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"), ManhuaPageProcessor.class)
                .scheduler(new RedisScheduler("127.0.0.1"))
                .downloader(new ResourceHttpClientDownloader("/data/webmagic/"))
                .thread(5)
                .run();
    }

    @Override
    public void afterProcess(Page page) {
        if (this.page == null) {
            this.page = "0";
        }
        if (this.page.length()<2){
            this.page = "0"+this.page;
        }
        if (this.cap.length()==1){
            this.cap = "00"+this.cap;
        }
        if (this.cap.length()==2){
            this.cap = "0"+this.cap;
        }
        Request request = new Request(imgSrc).setPriority(99.0).putExtra(ResourceHttpClientDownloader.FILE_PATH, cap + "/" + this.page + ".jpg");
        page.addTargetRequest(request);
        System.out.println(request);
    }
}
